library(tidyverse)
load("C:/BLOG/Workspaces/NIR Soil Tutorial/post4.RData")
We have seen in the previous post how to remove artifacts in the spectra using the spliceCorrection
function from the prospectr
package. But there are other sources of noise in the spectra which depends of several factors, as the quality of the spectrophotometer (can use a signal to noise ratio meassurement to test it), the scanning time, resolution, sample presentation, …. In this post we continue with the “course on applied data analytics for soil analysis with infrared spectroscopy” using the reference material provided. And this time we are going to see the part of the course where we add noise to the spectra and then we apply two methods to remove it. The methods are the Moving Window Average and the Savitzky-Golay Filter.
First we load the libraries we are going to use:
Adding noise to the first spectrum
We create some Gaussian noise to the first spectrum in the dataset (mean = 0, standard deviation = 0.0025), and we are gong to add it to the first spectrum in the dataset. Take in account that we are going to select the dataset dat$spc, which is the one we created in the previous post, so we are going to use the spectra without the artifacts.
#Set a seed for reproducibility of random noise
set.seed(801124)
# Add random noise to the first sample’s spectrum
<- dat$spc[1, ] + rnorm(ncol(dat$spc), sd = 0.0025) my_noisy_spc
If we just want to se the noise that we are adding:
ggplot(data = data.frame(wavelength = my_wavelengths, absorbance = rnorm(ncol(dat$spc), sd = 0.0025)),
aes(x = wavelength, y = absorbance)) +
geom_line(color = "blue") +
labs(title = "Noisy Spectrum",
x = "Wavelength (nm)",
y = "Absorbance") +
theme_minimal()
Plotting the noisy spectrum
ggplot(data = data.frame(wavelength = my_wavelengths, absorbance = my_noisy_spc),
aes(x = wavelength, y = absorbance)) +
geom_line(color = "blue") +
labs(title = "Spectrum 1 with added Noise",
x = "Wavelength (nm)",
y = "Absorbance") +
theme_minimal()
Removing the noise
No let´s apply to this spectrum two methods used to remove noise:
Moving Window Average
library(prospectr)
# Replace ‘my_noisy_spc’ with your actual data frame or matrix and ‘w’ with the desired
# window size
<- movav(my_noisy_spc, w = 11) mwa_result
Savitzky-Golay Filter
# Replace ‘my_noisy_spc’ with your actual data frame or matrix, ‘m’ with the window size,
# and ‘p’ with the polynomial order
<- savitzkyGolay(
sg_result # the noisy spectrum
my_noisy_spc, m = 0, # this m is for the derivative order, 0 is for no derivative
p = 3, # the polynomial order
w = 11 # the window size
)
# Create a long-format data frame for the MVA spectrum
<- data.frame(
long_mwa_spc wavelength = as.numeric(names(mwa_result)), # Extract the wavelengths
spectral_value = mwa_result, # Store the spectral values from the MVA
method = 'mva' # Label the data
)# Create a long-format data frame for the Savitzky-Golay (SG) spectrum
<- data.frame(
long_sg_spc wavelength = as.numeric(names(sg_result)), # Extract the wavelengths
spectral_value = sg_result, # Store the spectral values from the SG
method = 'sg' # Label the data
)# Combine the long-format MVA and SG data frames into one
<- rbind(long_mwa_spc, long_sg_spc) denoised_long
Now we create a data frame for the original noisy spectrum and some labels.
# Create a data frame for the original noisy spectrum
<- data.frame(
original_noisy wavelength = my_wavelengths, # Use the original wavelengths
spectral_value = my_noisy_spc # Use the original noisy spectrum values
)
<- c(
my_labels 'mva' = 'Moving average smoothing',
'sg' = 'Savitzky-Golay filtering'
)
Now we want to overplot the corrected spectra over the noisy one:
# Plot the spectra using ggplot2
ggplot() +
# Plot the original noisy spectrum as a red line in the background
geom_line(
data = original_noisy,
aes(x = wavelength, y = spectral_value),
color = 'black'
+
) # Plot the MVA and SG processed spectra on top of the original noisy spectrum
geom_line(
data = denoised_long,
aes(x = wavelength, y = spectral_value, color = method, group = method)
+
) # Add titles and labels for the axes
labs(
x = 'Wavelength (nm)',
y = 'Reflectance'
+
) #coord_cartesian(xlim = c(1500, 1800), ylim = c(0.425, 0.450)) +
# Manually set the colors for the MVA and SG lines
scale_color_manual(
values = c('mva' = '#00AFBB', 'sg' = '#E7B800'),
labels = my_labels
+
) # Facet the plot by the method, creating separate plots for MVA and SG
facet_grid(. ~ method, labeller = labeller(method = my_labels)) +
theme(legend.position = 'top') + guides(color = guide_legend(title = NULL))
We can zoom the spectra to see the differences more clearly:
ggplot() +
# Plot the original noisy spectrum as a red line in the background
geom_line(
data = original_noisy,
aes(x = wavelength, y = spectral_value),
color = 'black'
+
) # Plot the MVA and SG processed spectra on top of the original noisy spectrum
geom_line(
data = denoised_long,
aes(x = wavelength, y = spectral_value, color = method, group = method), size = 1
+
) # Add titles and labels for the axes
labs(
x = 'Wavelength (nm)',
y = 'Reflectance'
+
) coord_cartesian(xlim = c(1500, 1800), ylim = c(0.425, 0.450)) +
# Manually set the colors for the MVA and SG lines
scale_color_manual(
values = c('mva' = '#00AFBB', 'sg' = '#E7B800'),
labels = my_labels
+
) # Facet the plot by the method, creating separate plots for MVA and SG
facet_grid(. ~ method, labeller = labeller(method = my_labels)) +
theme(legend.position = 'top') + guides(color = guide_legend(title = NULL))
Bibliography:
Soil spectroscopy training material Wadoux, A., Ramirez-Lopez, L., Ge, Y., Barra, I. & Peng, Y. 2025. A course on applied data analytics for soil analysis with infrared spectroscopy – Soil spectroscopy training manual 2. Rome, FAO.